######    Computes Malmquist indices    #######
###     Imports two datasets for before and after periods
###     they do not need to be limited to the same schools
###     But they need to have id codes for the schools
###     The code computes the index only for those in both datasets
###     eh?


##   Compile (run) functons at the bottom first
##   Then run this top part down to where it says "run to here before using functions

##  Calls to the functions are of the form:
##  Malmq <- function(before, after, show = TRUE)
## where before is the matrix of schools in the earlier period, after is the matrix of schools in the later period
## show tells R whether to show the results after running it (default is TRUE, which means if you do not put a value in for it, it assumes it to be TRUE)


rm(list=ls())   ##  this just clears the memory to avoid confusion of things already in memory 

###   Set the root directory of the project here, eh?
setwd("/Users/michael/Desktop/Working")
####  Set years for comparison
#############################

before.year <- 2007    ## earlier year in comparison
after.year  <- 2011    ## later year in comparison

###########################
##   specify these please

m <- 2         ## number of inputs
s <- 2         ## number of good outputs
h <- 1         ## number of bad outputs
nd <- 0        ## number of nondiscretionary variables  

##  Thank you... 

echo=FALSE;


library(lpSolve)

#########################################################
###   Import the two datasets
###   These are set as paths relative to the root directory

before.mat <- read.table((paste(getwd(),"/data/final/final_data",before.year,".csv",sep="")), header = TRUE, sep = ",", stringsAsFactors = FALSE)
after.mat <- read.table((paste(getwd(),"/data/final/final_data",after.year,".csv",sep="")), header = TRUE, sep = ",", stringsAsFactors = FALSE)



############################
##  subset only variables needed from each dataset

attach(before.mat)
before.df <- data.frame(unitid, instnm, CarnClass,    ##  identifiers
                      InstrExp, ResExp,     ##  inputs
                      FTE, TotRes,      ##  good outputs
                      totemiss12           ##  bad outputs
                      #,hdd = hdd, cdd = cdd    ##  non-discretionary vars
                      )       
detach(before.mat)

attach(after.mat)
after.df <- data.frame(unitid, instnm, CarnClass,    ##  identifiers
                      InstrExp, ResExp,     ##  inputs
                      FTE, TotRes,      ##  good outputs
                      totemiss12           ##  bad outputs
                      #,hdd = hdd, cdd = cdd   ##  non-discretionary vars
                      )       
detach(after.mat)


####################################
####   Create subsets of schools


R.before <- subset(before.df, (CarnClass == "R1" | CarnClass == "R2" | CarnClass == "R"))
R.after <- subset(after.df, (CarnClass == "R1" | CarnClass == "R2" | CarnClass == "R"))

AS.before <- subset(before.df, (CarnClass == "AS" ))
AS.after <- subset(after.df, (CarnClass == "AS" ))



##########  run to here before using functions


########################################
########################################

####  Code below will produce Table 7 in the paper

R.mq <- Malmq(R.before,R.after)
R.mq <- R.mq[,-1]

## format results into Latex and create table
latex(format.df(R.mq,dec = 4), file = "", rowname = NULL, caption = "Change in environmental efficiency: research institutions", label = "Rcompare")

####  Code below will produce Table 8 in the paper

AS.mq <- Malmq(AS.before,AS.after)
AS.mq <- AS.mq[,-1]

## format results into Latex and create table
latex(format.df(AS.mq,dec = 4), file = "", rowname = NULL, caption = "Change in environmental efficiency: Liberal Arts schools", label = "AScompare")





     


###################   Functions
###########################################

##        Malmquist index:
##         
 
##         Latexing the output (which is a dataframe) makes a nice table. 

Malmq <- function(before, after, show = TRUE){
IF.b_xy.b <- env.eff(before, before, optim = FALSE, show = FALSE)$eff.scores
IF.a_xy.b <- env.eff(after, before, optim = FALSE, show = FALSE)$eff.scores
IF.b_xy.a <- env.eff(before, after, optim = FALSE, show = FALSE)$eff.scores
IF.a_xy.a <- env.eff(after, after, optim = FALSE, show = FALSE)$eff.scores

#########################################################
##  match up schools in both datasets
##  has to be done each way since there may be schools in one or the other that don't have matches
##  and has to be done separately for each subset, due to the fact that some schools switched
##  Carnegie classifications between 2005 and 2010
bb.df <- data.frame(unitid = before$unitid, instnm = before$instnm, eff <- IF.b_xy.b)
ab.df <- data.frame(unitid = before$unitid, instnm = before$instnm, eff <- IF.a_xy.b)
ba.df <- data.frame(unitid = after$unitid, instnm = after$instnm, eff <- IF.b_xy.a)
aa.df <- data.frame(unitid = after$unitid, instnm = after$instnm, eff <- IF.a_xy.a)


match.vec <- match(bb.df$unitid, ba.df$unitid)
bb.df <- cbind(bb.df, match.vec)
bb.df <- subset(bb.df, match.vec != "NA")
ab.df <- cbind(ab.df, match.vec)
ab.df <- subset(ab.df, match.vec != "NA")

match.vec <- match(ba.df$unitid, bb.df$unitid)
ba.df <- cbind(ba.df, match.vec)
ba.df <- subset(ba.df, match.vec != "NA")
aa.df <- cbind(aa.df, match.vec)
aa.df <- subset(aa.df, match.vec != "NA")

mq_index <- (bb.df$eff * ab.df$eff)/(ba.df$eff * aa.df$eff)
eff.change <- bb.df$eff/aa.df$eff
tech.change <- sqrt((aa.df$eff * ab.df$eff)/(ba.df$eff * bb.df$eff))

display <- data.frame(unitid = bb.df$unitid
                      ,Institution = bb.df$instnm 
                       ,eff.change 
                       ,tech.change
                       , Malmquist = mq_index
                       )
                          
                          if(show == TRUE){
                          	print(display)}
      return(display)
      }
      



###########################################

##        efficiency table:
##         This function runs all four models
##         and produces a table of efficiency scores for all of them
##         along with schools names
##         It is meant to easily produce tables for the paper. 
##         Latexing the output (which is a dataframe) makes a nice table. 

eff.table <- function(r.dat, e.dat = r.dat, show = TRUE){
oe <- op.eff(r.dat, e.dat, show = FALSE)
te <- tot.eff(r.dat, e.dat, show = FALSE)
ee.opt <- env.eff(r.dat, e.dat, optim = TRUE, show = FALSE)
ee.sq <- env.eff(r.dat, e.dat, optim = FALSE, show = FALSE)

tot.display <- data.frame(Institution = e.dat$instnm, 
                          OpEff = oe$eff.scores, 
                          EnvEff = ee.opt$eff.scores,
                          EnvEffStatQuo = ee.sq$eff.scores,
                          TotEff = te$eff.scores)
                          
                          if(show == TRUE){
                          	print(tot.display)}
      return(tot.display)
      }
      
      
###########################################
##            operational efficiency 

op.eff <-  function (roe.dat, eoe.dat = roe.dat, show = TRUE) 
{

ref.mat <- as.matrix(roe.dat[,4:ncol(roe.dat)])
eval.mat <- as.matrix(eoe.dat[,4:ncol(eoe.dat)])

n <- nrow(ref.mat)
n.dmu <- nrow(eval.mat)

#X <- ref.mat[,3:(m+2)]
#Y <- ref.mat[,(m+3):(m+2+s)]
#U <- ref.mat[,(2+m+s+1):ncol(dat.mat)]

X <- ref.mat[,1:m]
Y <- ref.mat[,(m+1):(m+s)]
U <- ref.mat[,(1+m+s):(m+s+h)]
ND <- ref.mat[,(1+m+s+h):(m+s+h+nd)]

######   Range variables

y.upper <- apply(Y,2,max)
y.lower <- apply(Y,2,min)
x.upper <- apply(X,2,max)
x.lower <- apply(X,2,min)
u.upper <- max(U) #apply(U,2,max)
u.lower <- min(U) #apply(U,2,min)

R.g <- 1/((m+s)* (y.upper - y.lower))  ## range for goods
R.x <- 1/((m+s)* (x.upper - x.lower))  ## range for inputs
#R.b <- 1/(h* (u.upper - u.lower))      ## range for bads

xynd.mat <- cbind(X,Y,ND)
xyund.mat <- cbind(X,Y, U, ND)

xynd.base <- eval.mat[,-((m+s+1):(m+s+h))] 
    

obj <- c(R.x,R.g, rep(0,n))
f.con1 <- as.matrix(t(xynd.mat))
con2.signs <- c(rep(1,m), rep(-1,s))

nd.rows <- matrix(0,nd,(m+s))  ## add constraint rows for non-discretionary variables                                             
f.con2 <- rbind(diag(con2.signs),nd.rows)

constr <- cbind(f.con2, f.con1)  ## input and output constraints

l.con <- t(c(rep(0,(m+s)), rep(1,n)))   ## convexity, sum of lambdas = 1

l.con2 <- cbind( (matrix(0,n,m+s)), diag(1,n))  ##  constrain each lambda to be > 0
constr <- rbind(constr,l.con, l.con2)

#constr <- rbind(constr,l.con)

s.con <- cbind(diag(1,(m+s)), matrix(0,(m+s),n))     ##  nonnegativity constraints on slacks
constr <- rbind(constr,s.con)    ##  full matrix of constraints

##  vector of signs for constraints
f.dir <- c(rep("==", (m+s+nd+1)), rep(">=", (m+s+n)))

      results <- matrix(0,n.dmu,(m+s+n))
      for (i in 1:n.dmu){
                     rhs <-  c(t(xynd.base[i,]),1,rep(0,(m+s+n)))
                     sol <- lp("max", obj, constr, f.dir, rhs)$solution; #print(sol)
                     results[i,] <- sol
                     }
opt.slax <- results[,1:(m+s)]
                     
eff.scores <- 1- opt.slax %*% obj[1:(m+s)]
if(show == TRUE){
	print("efficiency scores")
	oe.df <- data.frame(institution = eoe.dat$instnm, op.eff = eff.scores)
     print(oe.df) 
    } 

op.eff.output <- list(slax.df = data.frame(instnm = eoe.dat$instnm, results[,1:(m+s)]),opt.slax = results[,1:(m+s)], lambdas = results[,(m+s+1):ncol(results)], eff.scores = eff.scores)
    return(op.eff.output)
}

###########################################
####   environmental efficiency

env.eff <- function (ree.dat, eee.dat = ree.dat, optim = FALSE, show = TRUE) 
{

ref.mat <- as.matrix(ree.dat[,4:ncol(ree.dat)])
eval.mat <- as.matrix(eee.dat[,4:ncol(eee.dat)])


n <- nrow(ref.mat)
n.dmu <- nrow(eval.mat)

#X <- ref.mat[,4:(m+3)]
#Y <- ref.mat[,(m+4):(m+3+s)]
#U <- ref.mat[,(3+m+s+1):ncol(dat.mat)]

X <- ref.mat[,1:m]
Y <- ref.mat[,(m+1):(m+s)]
U <- as.matrix(ref.mat[,(1+m+s):(m+s+h)])
if(nd > 0){ND <- ref.mat[,(1+m+s+h):(m+s+h+nd)]}else{ND <- NULL}

######   Range variables

y.upper <- apply(Y,2,max)
y.lower <- apply(Y,2,min)
x.upper <- apply(X,2,max)
x.lower <- apply(X,2,min)
u.upper <- apply(U,2,max)
u.lower <- apply(U,2,min)

R.g <- 1/((m+s+h)* (y.upper - y.lower))  ## range for goods
R.x <- 1/((m+s+h)* (x.upper - x.lower))  ## range for inputs
if (optim == TRUE) {R.b <- 1/((m+s+h)* (u.upper - u.lower))} else {R.b <- 1/((h)* (u.upper - u.lower))}      ## range for bads
 
if(nd >0){xyund.mat <-(cbind(X,Y, U, ND))} else{xyund.mat <- (cbind(X,Y, U))}

xyund.base <- eval.mat



obj <- c(R.x, R.g, R.b, rep(0,n))   ## Has all inputs and outputs in it, but x and g are 
                                   ## multiplied by zero from con2.signs below
f.con1 <- as.matrix(t(xyund.mat))
con2.signs <- c(rep(0,m), rep(0,s), rep(1,h))  ## difference between this and total eff
                                               ## is the zeroes here
if(nd >0){nd.rows <- matrix(0,nd,(m+s+h))}  ## add constraint rows for non-discretionary variables                                             
if(nd > 0){f.con2 <-  rbind(diag(con2.signs),nd.rows)}else{f.con2 <- diag(con2.signs)} 

constr <- cbind(f.con2, f.con1)  ## input and output constraints

l.con <- t(c(rep(0,m+s+h), rep(1,n)))   ## convexity, sum of lambdas = 1
l.con2 <- cbind( (matrix(0,n,m+s+h)), diag(1,n))  ##  constrain each lambda to be > 0
constr <- rbind(constr,l.con, l.con2)

s.con <- cbind(diag(1,(m+s+h)), matrix(0,(m+s+h),n))     ##  nonnegativity constraints on slacks
constr <- rbind(constr,s.con)

f.dir <- c(rep("==", (m+s+h+nd+1)), rep(">=", (n)), rep("==",(m+s)), rep(">=", h))

##  set inputs and outputs at optimal levels
##  for rhs matrix
if (optim == TRUE) {  ## not sure if this part works yet
oe <- op.eff(ree.dat, eee.dat, show = FALSE)
opt.slax <- oe$opt.slax
opt.slax <- cbind(opt.slax, matrix(0,n.dmu,h + nd))
rhs.signs <- c(rep(-1,m), rep(1,s), rep(0,h), rep(0,nd))
rhs.sign.mat <- matrix(rhs.signs, n.dmu,(m+s+h+nd), byrow = TRUE)
rhs.mat <- xyund.base + (rhs.sign.mat * opt.slax)
             } else {
	                 (rhs.mat <- xyund.base)}


      results <- matrix(0,n.dmu,(m+s+h+n))
      for (i in 1:n.dmu){
                     rhs <-  c(t(rhs.mat[i,]),1,rep(0,(m+s+h+n)))
                     sol <- lp("max", obj, constr, f.dir, rhs)$solution; #print(sol)
                     results[i,] <- sol
                     }


 
opt.slax <- results[,1:(m+s+h)]

                     
eff.scores <- 1- opt.slax[,((m+s+1):(m+s+h))] * obj[(m+s+1):(m+s+h)]

if(show == TRUE){
	caption <- ifelse((optim == TRUE), "Assuming optimal operational efficiency", "Assuming status quo of outputs and inputs")
print(caption)
ee.df <- data.frame(institution = eee.dat$instnm, env.eff = eff.scores)
print(ee.df) }


env.eff.output <- list(slax.df = data.frame(instnm = eee.dat$instnm, results[,1:(m+s+h)]), opt.slax = results[,1:(m+s+h)], lambdas = results[,(m+s+h+1):ncol(results)], eff.scores = eff.scores)
    return(env.eff.output)
}

###########################################
##            total efficiency 

tot.eff <-  function (rte.dat, ete.dat = rte.dat, show = TRUE) 
{

ref.mat <- as.matrix(rte.dat[,4:ncol(rte.dat)])
eval.mat <- as.matrix(ete.dat[,4:ncol(ete.dat)])

n <- nrow(ref.mat)
n.dmu <- nrow(eval.mat)

#X <- ref.mat[,4:(m+3)]
#Y <- ref.mat[,(m+4):(m+3+s)]
#U <- ref.mat[,(3+m+s+1):ncol(dat.mat)]

X <- ref.mat[,1:m]
Y <- ref.mat[,(m+1):(m+s)]
U <- ref.mat[,(1+m+s):(m+s+h)]
ND <- ref.mat[,(1+m+s+h):(m+s+h+nd)]

 

######   Range variables

y.upper <- apply(Y,2,max)
y.lower <- apply(Y,2,min)
x.upper <- apply(X,2,max)
x.lower <- apply(X,2,min)
u.upper <- max(U) #apply(U,2,max)
u.lower <- min(U) #apply(U,2,min)

R.g <- 1/((m+s+h)* (y.upper - y.lower))  ## range for goods
R.x <- 1/((m+s+h)* (x.upper - x.lower))  ## range for inputs
R.b <- 1/((m+s+h)* (u.upper - u.lower))      ## range for bads

xyund.mat <- cbind(X,Y, U, ND)

xyund.base <- eval.mat
    

obj <- c(R.x,R.g, R.b, rep(0,n))    
f.con1 <- as.matrix(t(xyund.mat))
con2.signs <- c(rep(1,m), rep(-1,s), rep(1,h))

nd.rows <- matrix(0,nd,(m+s+h))  ## add constraint rows for non-discretionary variables                                             
f.con2 <- rbind(diag(con2.signs),nd.rows)

constr <- cbind(f.con2, f.con1)  ## input and output constraints

l.con <- t(c(rep(0,(m+s+h)), rep(1,n)))   ## convexity, sum of lambdas = 1

l.con2 <- cbind( (matrix(0,n,m+s+h)), diag(1,n))  ##  constrain each lambda to be > 0
constr <- rbind(constr,l.con, l.con2)

#constr <- rbind(constr,l.con)

s.con <- cbind(diag(1,(m+s+h)), matrix(0,(m+s+h),n))     ##  nonnegativity constraints on slacks
constr <- rbind(constr,s.con)    ##  full matrix of constraints

##  vector of signs for constraints


f.dir <- c(rep("==", (m+s+h+nd+1)), rep(">=", (n)), rep(">=", (m+s+h)))

      results <- matrix(0,n.dmu,(m+s+h + n))
      for (i in 1:n.dmu){print(dim(constr));print(length(f.dir))
                     rhs <-  c(t(xyund.base[i,]),1,rep(0,(m+s+h+n))); print(length(rhs))
                     sol <- lp("max", obj, constr, f.dir, rhs)$solution; #print(sol)
                     results[i,] <- sol
                     }
opt.slax <- results[,1:(m+s+h)]
                     
eff.scores <- 1- opt.slax %*% obj[1:(m+s+h)]  
te.df <- data.frame(institution = ete.dat$instnm, tot.eff = eff.scores)                    

if(show == TRUE){
	print("efficiency scores")
        print(te.df)} 

tot.eff.output <- list(opt.slax <- results[,1:(m+s+h)], slax.df = data.frame(instnm = ete.dat$instnm, results[,1:(m+s+h)]), lambdas = results[,(m+s+1):ncol(results)], eff.scores = eff.scores, te.df = te.df)
    return(tot.eff.output)
}
 
